var fs = require('fs');
var url = require('url');
var https = require('https');
var async = require('async');
var mysql = require('mysql');
var cheerio = require('cheerio');
var phantom = require('node-phantom');
var querystring = require('querystring');
var MongoClient = require('mongodb').MongoClient;
var Vtil = require('../src/vtil');

var WeiboScrapy = function(options) {

    var innerMod = {};
    var cookiePath = __dirname + '/cookie.dat';
    var picSource = 'http://baike.so.com/resource/other/bkeditor/server/getImgJson.php';
    innerMod.options = {
        debug: false,
        delay: 200,
        dumpFile: '',
        logFile: '',
        appKey: '4209225449',
        accessToken: '',
        cookieFile: cookiePath,
        os: '',
        eraseFiles: true,
        usePhantom: false,
        sleep: 30000
    };

    var vtil = Vtil({
        logFile: innerMod.options.logFile
    });

    innerMod.mysqlConf = {
        host: 'localhost',
        user: 'root',
        password: 'root',
        database: 'campaign'
    };

    innerMod.mongoConf = {
        user: '',
        password: '',
        host: 'localhost',
        port: 27017,
        database: 'campaign',
        limit: 10
    };

    for (var attr in options) {
        innerMod.options[attr] = options[attr];
    }

    innerMod.options.debug && (innerMod.options.dumpFile = '../runtime/output.json');
    !innerMod.options.os && (innerMod.options.os = require('os').platform());
    ('linux' === innerMod.options.os) && (innerMod.mysqlConf.socketPath = '/var/run/mysqld/mysqld.sock');

    innerMod._getFeeds = function(feedListText, callback) {
        $ = cheerio.load(feedListText);
        var feedMids = [];
        var weiboReqs = [];
        var self = this;
        $('.feed_list').each(function() {
            var $this = $(this);
            feedMids.push($this.attr('mid'));
        });

        fs.readFile(this.options.cookieFile, 'utf-8', function(err, cookie){
            if (err) throw err;
            feedMids.forEach(function(mid){
                weiboReqs.push(function(cb){
                    var detailUrl = 'https://api.weibo.com/2/statuses/show.json?id=' + mid;
                    if (self.options.accessToken) {
                        detailUrl += '&access_token=' + self.options.accessToken;
                    } else {
                        detailUrl += '&source=' + self.options.appKey;
                    }
                    vtil.log('Call sina api with weibo mid: ' + mid);
                    var options = url.parse(detailUrl);
                    options.headers = {'Cookie': cookie};
                    vtil.get(options, function(err, data){
                        err && vtil.log('Exception occurs:' + err);
                        cb(err, JSON.parse(data.toString()));
                    });
                });
            });

            async.series(weiboReqs, function(err, results){
                err ? vtil.log('Exception occurs:' + err) : innerMod._exeCallback(callback, results);
            });
        });
    };

    innerMod._deleteFile = function(fileName) {
        if (fileName) {
            require('fs').exists(fileName, function (exists) {
                if (exists) {
                    fs.unlink(fileName, function (err) {
                        if (err) throw err;
                        vtil.log('successfully deleted ' + fileName);
                    });
                }
            });
        }
    };

    innerMod._exeCallback = function(callback) {
        var args = [].slice.call(arguments, 1);
        callback && ('function' === typeof(callback)) && callback.apply(null, args);
    };

    innerMod._dumpData = function(json){
        var dumpFile = this.options.dumpFile;
        if (this.options.debug && dumpFile) {
            var line = JSON.stringify(json, null, 4) + '\n';
            fs.appendFile(dumpFile, line, function(err){
                if (err) throw err;
                vtil.log('JSON data is written to ' + dumpFile);
            });
        }
    };

    innerMod._getMongoURL = function(config) {
        !config && (config = this.mongoConf);
        var mongoUrl = 'mongodb://';
        config.user && config.password && (mongoUrl += config.user + ':' + config.password + '@');
        config.host && (mongoUrl += config.host);
        config.port && (mongoUrl += ':' + config.port);
        config.database && (mongoUrl += '/' + config.database);
        return mongoUrl;
    };

    innerMod._connectMongo = function(callback) {
        var mongoUrl = innerMod._getMongoURL();
        MongoClient.connect(mongoUrl, function(err, db){
            err && vtil.log('Fail to connect MongoDB ' + JSON.stringify(err));
            if (!err) {
                vtil.log('Connected to the mongo server at: ' + mongoUrl);
                var collection = db.collection('feeds');
                innerMod._exeCallback(callback, db, collection);
            }
        });
    };

    innerMod.phantomGet = function(url, callback) {
        var bit = '';
        ('linux' === innerMod.options.os) && (bit = '-32');
        phantom.create(function(err, ph) {
            if (err) {
                vtil.log('Exception occurs:' + err);
                return null;
            }
            return ph.createPage(function(err, page) {
                return page.open(url, function(err, status) {
                    vtil.log("status: ", status);
                    setTimeout(function() {
                        page.evaluate(function() {
                            return document.querySelector('.feed_lists').outerHTML;
                        }, function(err, feedListText) {
                            if (feedListText) {
                                innerMod._getFeeds(feedListText, function(json){
                                    innerMod._dumpData(json);
                                    innerMod._exeCallback(callback, json);
                                    ph.exit();
                                });
                            } else {
                                vtil.log('You are restricted, please enable it again');
                                ph.exit();
                                return;
                            }
                        });
                    }, innerMod.options.delay);
                });
            });
        }, {phantomPath: __dirname + '/../bin/' + innerMod.options.os + '/phantomjs' + bit});
    };

    innerMod.plainGetFeeds = function(feedUrl, callback) {
        fs.readFile(this.options.cookieFile, 'utf-8', function(err, cookie){
            if (err) throw err;
            var options = url.parse(feedUrl);
            options.headers = {'Cookie': cookie};
            vtil.get(options, function(err, data){
                err && vtil.log('Exception occurs:' + err);
                if (!err) {
                    var html = data.toString();
                    var start = html.search('{"pid":"pl_wb_feedlist"');
                    if (-1 === start) {
                        vtil.log('You are restricted, please enable it again');
                        return;
                    }
                    var end = html.slice(start).indexOf('})</script>');
                    var moduleJSON = html.slice(start, start + end + 1);
                    innerMod._getFeeds(JSON.parse(moduleJSON)['html'], function(json){
                        innerMod._dumpData(json);
                        innerMod._exeCallback(callback, json);
                    });
                }
            });
        });
    };

    innerMod.get = function(url, callback) {
        vtil.log('Crawling url:' + url);
        if (innerMod.options.usePhantom) {
            innerMod.phantomGet(url, callback);
        } else {
            innerMod.plainGetFeeds(url, callback);
        }
    };

    innerMod.queryTags = function(callback, config) {
        !config && (config = innerMod.mysqlConf);
        var table = config.table ? config.table : 'tag';
        if (innerMod.options.eraseFiles) {
            innerMod._deleteFile(innerMod.options.logFile);
            innerMod._deleteFile(innerMod.options.dumpFile);
        }
        var connection = mysql.createConnection(config);
        connection.connect(function(err){
            if (err) vtil.log('Fail to connect mySQL ' + JSON.stringify(err));
        });
        connection.query('SELECT * FROM ' + table, function(err, rows, fields) {
            if (err) throw err;
            vtil.log('Get tags form DB: ', rows);
            innerMod._exeCallback(callback, rows);
        });
        connection.end();
    };

    innerMod.storeFeeds = function(feeds, callback, config) {
        innerMod._connectMongo(function(db, collection){
            var feedInsertions = [];
            feeds.forEach(function(feed){
                if (!feed.error && !feed.error_code) {
                    feedInsertions.push(function(cb){
                        feed['created_at'] = new Date(feed['created_at']).getTime();
                        collection.update({'id': feed['id']}, {$set: feed}, {upsert: true}, function(err, count, info) {
                            err && vtil.log('Exception occurs:' + err);
                            if (!err) {
                                var opt = info.updatedExisting ? 'updated' : 'inserted';
                                vtil.log('One feed is ' + opt);
                                cb(err, info.updatedExisting);
                            }
                        });
                    });
                }
            });
            async.parallel(feedInsertions, function(err, results){
                err && vtil.log('Exception occurs:' + err);
                collection.count(function(err, total) {
                    err && vtil.log('Exception occurs:' + err);
                    if (!err) {
                        var inserted = 0;
                        var updated = 0;
                        results.forEach(function(updatedExisting){
                            updatedExisting ? (++updated) : (++inserted);
                        });
                        innerMod._exeCallback(callback, total);
                        vtil.log('There are ' + total + ' feeds now');
                        vtil.log(inserted + ' feeds are inserted');
                        vtil.log(updated + ' feeds are updated');
                        db.close();
                        vtil.log('Closed mongo connection');
                    }
                });
            });
        });
    };

    innerMod.queryFeeds = function(queryEntity, callback, limit) {
        innerMod._connectMongo(function(db, collection){
            !limit && (limit = innerMod.mongoConf.limit);
            collection
            .find(queryEntity)
            .limit(limit)
            .toArray(function(err, docs){
                err ? vtil.log('Exception occurs:' + err) : innerMod._exeCallback(callback, docs);
                db.close();
            });
        });
    };

    innerMod._queue = async.queue(function(opts, done){
        var tag = opts.name;
        vtil.log('querying tag ' + tag);
        innerMod.get('http://s.weibo.com/wb/' + tag + '&xsort=hot&page=1', function(feeds){
            innerMod.storeFeeds(feeds);
            setTimeout(done, innerMod.options.sleep);
        });
    });

    innerMod.crawl = function() {
        innerMod.queryTags(function(tags){
            tags.forEach(function(tag){
                var name = tag['name'];
                innerMod._queue.push({name: name});
            });
        });
    };

    innerMod.pics =  function(keyword) {
        vtil.get(picSource, {q: keyword}, function(err, data){
            var pics = JSON.parse(data.toString()).list;
            vtil.log(pics.length);
            pics.forEach(function(pic, idx){
                if (pic.img) {
                    vtil.get(pic.img, function(err, picData){
                        fs.writeFile('images/' + idx + '.jpg', picData, function(err){
                            err && vtil.log('Exception occurs:' + err);
                        });
                    });
                }
            });
        });
    };

    return {
        get: innerMod.get,
        queryTags: innerMod.queryTags,
        storeFeeds: innerMod.storeFeeds,
        queryFeeds: innerMod.queryFeeds,
        crawl: innerMod.crawl,
        pics: innerMod.pics
    };
};

exports = module.exports = WeiboScrapy;
